home *** CD-ROM | disk | FTP | other *** search
- /*
- *
- *
- * The information in this document is subject to change
- * without notice and should not be construed as a commitment
- * by Digital Equipment Corporation or by DECUS.
- *
- * Neither Digital Equipment Corporation, DECUS, nor the authors
- * assume any responsibility for the use or reliability of this
- * document or the described software.
- *
- * Copyright (C) 1980, DECUS
- *
- *
- * General permission to copy or modify, but not for profit, is
- * hereby granted, provided that the above copyright notice is
- * included and reference made to the fact that reproduction
- * privileges were granted by DECUS.
- *
- */
-
- #define SMALLC /* comment out for unix */
- #define NOLOWER /* comment out if lower case passed in command line */
- #define GOODHELP /* comment out to disable -h option */
-
- #ifdef SMALLC
- #include <stdiocb.h> /* addresses, orgs to link to system routines */
- #include <libasm.h> /* addresses, orgs to link to printf, etc.*/
- #else
- #include <stdio.h>
- #endif
- /*
- * grep.
- *
- * Original version ran on the Decus compiler or on vms.
- * Converted for BDS compiler (under CP/M-80), 20-Jan-83, by Chris Kern.
- *
- * Converted to IBM PC with CI-C86 C Compiler June 1983 by David N. Smith
- *
- * Converted to Small C Version 2.0 (under CP/M-80) by C. Bingham 860724.1
- * Compiles under BSD 4.2 cc when SMALLC and NOLOWER not defined.
- * Several corrections were made to error messages and pattern checking.
- * In addition, the capability to distinguish upper and lower case letters
- * was added. However, in CP/M lower case is translated to upper in the
- * command line. Thus when NOLOWER is defined at compilation, grep assumes
- * all letters are lower case unless escaped by '\' when they are always
- * assumed taken to be upper case. Similarly, in CP/M, one cannot have
- * imbedded blanks or tabs in an argument on the command line. Thus when
- * NOLOWER is defined, '_' and '`' are interpreted as blank and tab unless
- * escaped by '\' ('\_' and '\`' ).
- *
- * See below for more information.
- *
- */
-
- #ifdef GOODHELP
- char *docs0 = "Grep searches a file for a given pattern. Execute by";
- char *docs1 = " grep [flags] regular_expression file_list";
- char *docs2 = " ";
- char *docs3 = "Flags are single characters preceeded by '-':";
- char *docs4 = " -c Only a count of matching lines is printed";
- char *docs5 =
- " -f Print file name for matching lines switch; see below";
- char *docs6 = " -n Each line is preceeded by its line number";
- char *docs7 = " -v Only print non-matching lines";
- char *docs8 = " ";
- char *docs9 = "The file_list is a list of files.";
- char *docs10 = " ";
- char *docs11 = "The file name is printed only if more than one file is named.";
- char *docs12 =
- "The -f flag reverses this action (print name if one file, not if more).";
- char *docs13 = " ";
- char *docs14 = "\0";
- #ifdef SMALLC
- int docs[15];
- #else
- char *docs[15];
- #endif
- #ifdef NOLOWER
- char *patdoc0 =
- "The regular_expression defines the pattern to search for. Upper case";
- char *patdoc1 =
- "letters must be preceded by '\\'.";
- #else
- char *patdoc0 =
- "The regular_expression defines the pattern to search for. Upper and";
- char *patdoc1 =
- "lower case are distinguished.";
- #endif
- char *patdoc2 = " ";
- char *patdoc3 =
- "x An ordinary character (not mentioned below) matches that character.";
- char *patdoc4 =
- "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.";
- char *patdoc5 =
- "'$' A dollar-sign at the end of an expression matches the end of a line.";
- char *patdoc6 =
- "'^' A circumflex at the beginning of an expression matches the";
- char *patdoc7 =
- " beginning of a line. Thus '^$' matches an empty line.";
- char *patdoc8 =
- "'.' A period matches any character except \"newline\".";
- char *patdoc9 =
- "':a' A colon matches a class of characters described by the following";
- char *patdoc10 =
- "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,";
- char *patdoc11 =
- "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and";
- char *patdoc12 =
- "': ' other non-printing control characters.";
- char *patdoc13 =
- "'*' An expression followed by an asterisk matches zero or more";
- char *patdoc14 =
- " occurrances of that expression: \"fo*\" matches \"f\"; \"fo\" and";
- char *patdoc15 =
- " \"foo\"; \"t.*e\" matches \"te\", \"the\", \"table\", etc.";
- char *patdoc16 =
- "'+' An expression followed by a plus sign matches one or more";
- char *patdoc17 =
- " occurrances of that expression: \"fo+\" matches \"fo\", not \"f\".";
- char *patdoc18 =
- "'-' An expression followed by a minus sign matches 0 or 1 occurrances";
- char *patdoc19 =
- " of the expression. \"te-n\" matches \"tn\" and \"ten\", not \"teen\".";
- char *patdoc20 =
- "'[]' A string enclosed in square brackets matches any character in";
- char *patdoc21 =
- " that string, but no others. If the first character in the";
- char *patdoc22 =
- " string is a circumflex ('^'); the expression matches any character";
- char *patdoc23 =
- " except \"newline\" and the characters in the string. For";
- char *patdoc24 =
- " example, \"[xyz]\" matches \"x\", \"y\" or \"z\", while \"[^xyz]\"";
- char *patdoc25 =
- " matches \"a\" or \"b\" but not \"x\". A range of characters may be";
- char *patdoc26 =
- " specified by two characters separated by \"-\". Thus,";
- char *patdoc27 =
- " [a-z] matches any lower case letter, while [z-a] never matches.";
- char *patdoc28 =
- "The concatenation of regular expressions is a regular expression.";
- #ifdef NOLOWER
- char *patdoc29 =
- "A space must be coded by '_' and a tab by '`'. To obtain these characters";
- char *patdoc30 =
- "use '\\_' and '\\`'.";
- char *patdoc31 = "\0";
- #else
- char *patdoc29 = "\0";
- #endif
- #ifdef SMALLC
- #ifdef NOLOWER
- int patdoc[32];
- #else
- int patdoc[30];
- #endif
- #else
- #ifdef NOLOWER
- char *patdoc[32];
- #else
- char *patdoc[30];
- #endif
- #endif
- #endif
-
- #define LMAX 512
- #define PMAX 256
- #define ENDSTR '\n'
- #define CHARR 1
- #define BOL 2
- #define EOL 3
- #define ANY 4
- #define CLASS 5
- #define NCLASS 6
- #define STAR 7
- #define PLUS 8
- #define MINUS 9
- #define ALPHA 10
- #define DIGIT 11
- #define NALPHA 12
- #define PUNCT 13
- #define RANGE 14
- #define ENDPAT 15
- int cflag;
- int fflag;
- int nflag;
- int vflag;
- int nfile;
- int debug = 0; /* Set for debug code */
- char *pp;
- char lbuf[LMAX];
- char pbuf[PMAX];
- /*******************************************************/
- main(argc, argv)
- int argc;
- #ifdef SMALLC
- int argv[]; /* no *char arrays in small c*/
- #else
- char *argv[];
- #endif
- {
- char *p;
- int c, i;
- int gotpattern;
- FILE *f;
- cflag = fflag = nflag = vflag = debug = 0;
- #ifdef GOODHELP
- setdocs();
- setpatdoc();
- #endif
- if (argc <= 1)
- usage("No arguments");
- nfile = argc-1;
- gotpattern = 0;
- for (i=1; i < argc; ++i) {
- p = argv[i];
- if (*p == '-') {
- ++p;
- while ((c = *p++)) {
- switch(tolower(c)) {
- #ifdef GOODHELP
- case 'h':
- help(docs);
- help(patdoc);
- break;
- #endif
- case 'c':
- ++cflag;
- break;
- case 'd':
- ++debug;
- break;
- case 'f':
- ++fflag;
- break;
- case 'n':
- ++nflag;
- break;
- case 'v':
- ++vflag;
- break;
- default:
- usage("Unknown flag");
- }
- }
- argv[i] = 0;
- --nfile;
- } else if (!gotpattern) {
- compile(p);
- argv[i] = 0;
- ++gotpattern;
- --nfile;
- }
- }
- if (!gotpattern)
- usage("No pattern");
- if (nfile == 0)
- grep(stdin, "stdin");
- else {
- fflag = fflag ^ (nfile > 1);
- for (i=1; i < argc; ++i) {
- if ((p = argv[i])) {
- if ((f=fopen(p, "r")) == NULL)
- cant(p);
- else {
- grep(f, p);
- fclose(f);
- }
- }
- }
- }
- }
- /*******************************************************/
- cant(s)
- char *s;
- {
- fprintf(stderr,"grep: cannot open %s\n",s);
- }
- /*******************************************************/
- #ifdef GOODHELP
- help(hp)
- #ifdef SMALLC
- int hp[];
- #else
- char *hp[];
- #endif
- /*
- * Give good help
- */
- {
- int i;
- char *line;
- for (i=0,line=hp[0]; *line; line=hp[++i])
- {
- fprintf(stderr,"%s\n",hp[i]);
- }
- }
- #endif
- /*******************************************************/
- usage(s)
- char *s;
- {
- fprintf(stderr,"grep: %s\n",s);
- #ifdef GOODHELP
- fputs(
- "Usage: grep [-cfnvh] pattern [ file1 [ file2 ... ] ] [ > file ]\n",stderr);
- fputs(" or: grep [-cfnvh] pattern < file1 [ > file ]\n",stderr);
- fputs("To get help, use grep -h\n",stderr);
- #else
- fputs(
- "Usage: grep [-cfnv] pattern [ file1 [ file2 ... ] ] [ > file ]\n",stderr);
- fputs(" or: grep [-cfnv] pattern < file1 [ > file ]\n",stderr);
- #endif
- exit(1);
- }
- /*******************************************************/
- compile(source)
- char *source; /* Pattern to compile */
- /*
- * Compile the pattern into global pbuf[]
- */
- {
- char *s; /* Source string pointer */
- char *lp; /* Last pattern pointer */
- int c; /* Current character */
- int o; /* Temp */
- char *spp; /* Save beginning of pattern */
- #ifdef SMALLC
- int cclass(); /* Compile class routine */
- #else
- char *cclass();
- #endif
- s = source;
- if (debug)
- fprintf(stderr,"Pattern = \"%s\"\n", s);
- pp = pbuf;
- while ((c = *s++)) {
- /*
- * STAR, PLUS and MINUS are special.
- */
- if (c == '*' || c == '+' || c == '-') {
- if (pp == pbuf ||
- (o = *lp) == BOL ||
- o == EOL ||
- o == STAR ||
- o == PLUS ||
- o == MINUS
- )
- badpat("Illegal occurrance op.", source, s);
- store(ENDPAT);
- store(ENDPAT);
- spp = pp; /* Save pattern end */
- while (--pp > lp) /* Move pattern down */
- *pp = *(pp-1); /* one byte */
- if(c == '*') *pp = STAR;
- else
- {
- if(c == '-') *pp = MINUS;
- else *pp = PLUS;
- }
- pp = spp; /* Restore pattern end */
- continue;
- }
- /*
- * All the rest.
- */
- lp = pp; /* Remember start */
- switch(c) {
- case '^':
- store(BOL);
- break;
- case '$':
- store(EOL);
- break;
- case '.':
- store(ANY);
- break;
- case '[':
- s = cclass(source, s);
- break;
- case ':':
- if (*s) {
- c = *s++;
- switch(tolower(c)) {
- case 'a':
- store(ALPHA);
- break;
- case 'd':
- store(DIGIT);
- break;
- case 'n':
- store(NALPHA);
- break;
- case ' ':
- store(PUNCT);
- break;
- default:
- badpat("Unknown : type", source, s);
- }
- break;
- }
- else badpat("No : type", source, s);
- case '\\':
- if (*s)
- c = *s++;
- #ifdef NOLOWER
- store(CHARR);
- store(c);
- break;
- #endif
- default:
- #ifdef NOLOWER
- switch(c)
- {
- case '_':
- c = ' ';
- break;
- case '`':
- c = '\t';
- break;
- default:
- c = tolower(c);
- }
- #endif
- store(CHARR);
- store(c);
- }
- }
- store(ENDPAT);
- store(0); /* Terminate string */
- if (debug) {
- for (lp = pbuf; lp < pp;) {
- if ((c = (*lp++ & 255)) < ' ')
- fprintf(stderr,"%2xh ", c);
- else
- fprintf(stderr,"%c ", c);
- }
- putc('\n',stderr);
- }
- }
- /*******************************************************/
- /*char * (not legal for Small C) */
- #ifdef SMALLC
- cclass(source, src)
- #else
- char *cclass(source, src)
- #endif
- char *source; /* Pattern start -- for error msg. */
- char *src; /* Class start */
- /*
- * Compile a class (within [])
- */
- {
- char *s; /* Source pointer */
- char *cp; /* Pattern start */
- int c; /* Current character */
- int o; /* Temp */
- s = src;
- o = CLASS;
- if (*s == '^') {
- ++s;
- o = NCLASS;
- }
- store(o);
- cp = pp;
- store(0); /* Byte count */
- while ((c = *s++))
- {
- if (c == ']' && pp - cp > 1)
- break;
- if (c == '\\') { /* Store quoted char */
- if ((c = *s++) == '\0') /* Gotta get something */
- badpat("Class terminates badly", source, --s);
- else store(c);
- }
- else if ((c == '-') &&
- ((pp - cp) > 1) && (*s != ']') && (*s != '\0') )
- {
- c = *(pp-1); /* Range start */
- *(pp-1) = RANGE; /* Range signal */
- store(c); /* Re-store start */
- c = *s++; /* Get end char */
- #ifdef NOLOWER
- switch(c)
- {
- case '\\':
- c = *s++;
- if(c == '\0')
- badpat("Class terminates badly", source, --s);
- break;
- case '_':
- c = ' ';
- break;
- case '`':
- c = '\t';
- break;
- default:
- c = tolower(c);
- }
- #else
- if(c == '\\') /* Store quoted character */
- {
- if((c = *s++) == '\0')
- badpat("Class terminates badly", source, --s);
- }
- #endif
- store(c); /* Store it */
- }
- else
- {
- #ifdef NOLOWER
- switch (c)
- {
- case '_':
- c = ' ';
- break;
- case '`':
- c = '\t';
- break;
- default:
- c = tolower(c);
- }
- #endif
- store(c); /* Store normal char */
- }
- }
- if (c != ']')
- badpat("Unterminated class", source, --s);
- if ((c = (pp - cp)) >= 256)
- badpat("Class too large", source, s);
- *cp = c;
- return(s);
- }
- /*******************************************************/
- store(op)
- char op;
- {
- if (pp >= &pbuf[PMAX])
- error("Pattern too complex\n");
- *pp++ = op;
- }
- /*******************************************************/
- badpat(message, source, stop)
- char *message; /* Error message */
- char *source; /* Pattern start */
- char *stop; /* Pattern end */
- {
- fprintf(stderr,"grep: %s, pattern is \"%s\"\n",message,source);
- fprintf(stderr," Stopped at byte %d \'%c\'\n",
- stop-source,*(stop-1));
- fputs("'\n",stderr);
- error("grep: Bad pattern\n");
- }
- /*******************************************************/
- grep(fp, fn)
- FILE *fp; /* File to process */
- char *fn; /* File name (for -f option) */
- /*
- * Scan the file for the pattern in pbuf[]
- */
- {
- int lno, count, m;
- lno = 0;
- count = 0;
- while (fgets(lbuf, LMAX, fp)) {
- ++lno;
- m = match();
- if ((m && !vflag) || (!m && vflag)) {
- ++count;
- if (!cflag) {
- if (fflag)
- printf("%s:",fn);
- if (nflag)
- printf("%d:", lno);
- printf("%s", lbuf);
- }
- }
- }
- if (cflag) {
- if (fflag)
- printf("%s: ",fn);
- printf("%d\n", count);
- }
- }
- /*******************************************************/
- match()
- /*
- * Match the current line (in lbuf[]), return 1 if it does.
- */
- {
- char *l; /* Line pointer */
- #ifdef SMALLC
- int pmatch();
- #else
- char *pmatch();
- #endif
- for (l = lbuf; *l; l++) {
- if (pmatch(l, pbuf))
- return(1);
- }
- return(0);
- }
- /*******************************************************/
- #ifdef SMALLC
- pmatch(line, pattern)
- #else
- char *pmatch(line, pattern)
- #endif
- char *line; /* (partial) line to match */
- char *pattern; /* (partial) pattern to match */
- {
- char *l; /* Current line pointer */
- char *p; /* Current pattern pointer */
- char c; /* Current character */
- char *e; /* End for STAR and PLUS match */
- int op; /* Pattern operation */
- int n; /* Class counter */
- char *are; /* Start of STAR match */
- l = line;
- if (debug > 1)
- fprintf(stderr,"pmatch(\"%s\")\n", line);
- p = pattern;
- while ((op = *p++) != ENDPAT) {
- if (debug > 1)
- fprintf(stderr,"byte[%d] = %xh, '%c',op = %xh\n",
- l-line, *l, *l, op);
- switch(op) {
- case CHARR:
- if (*l != *p++)
- return(0);
- l++;
- break;
- case BOL:
- if (l != lbuf)
- return(0);
- break;
- case EOL:
- if (*l != ENDSTR)
- return(0);
- break;
- case ANY:
- if (*l++ == ENDSTR)
- return(0);
- break;
- case DIGIT:
- if ((c = *l++) < '0' || (c > '9'))
- return(0);
- break;
- case NALPHA:
- case ALPHA:
- c = *l++;
- if (c >= 'a' && c <= 'z')
- break;
- else if (c >= 'A' && c <= 'Z')
- break;
- else if(op == NALPHA && c >= '0' && c <= '9')
- break;
- return(0);
- case PUNCT:
- c = *l++;
- if (c == ENDSTR || c > ' ')
- return(0);
- break;
- case CLASS:
- case NCLASS:
- c = *l++;
- n = *p++ & 255;
- do {
- if (*p == RANGE) {
- p += 3;
- n -= 2;
- if (c >= *(p-2) && c <= *(p-1))
- break;
- }
- else if (c == *p++)
- break;
- } while (--n > 1);
- if ((op == CLASS) == (n <= 1))
- return(0);
- if (op == CLASS)
- p += n - 2;
- break;
- case MINUS:
- e = pmatch(l, p); /* Look for a match */
- while (*p++ != ENDPAT); /* Skip over pattern */
- if (e) /* Got a match? */
- l = e; /* Yes, update string */
- break; /* Always succeeds */
- case PLUS: /* One or more ... */
- if ((l = pmatch(l, p)) == 0)
- return(0); /* Gotta have a match */
- case STAR: /* Zero or more ... */
- are = l; /* Remember line start */
- while ((*l != ENDSTR) && ((e = pmatch(l, p))))
- l = e; /* Get longest match */
- while (*p++ != ENDPAT); /* Skip over pattern */
- while (l >= are) { /* Try to match rest */
- if ((e = pmatch(l, p)))
- return(e);
- --l; /* Nope, try earlier */
- }
- return(0); /* Nothing else worked */
- default:
- fprintf(stderr,"Bad op code %d\n", op);
- error("Cannot happen -- match\n");
- }
- }
- return(l);
- }
- /*******************************************************/
- error(s)
- char *s;
- {
- fputs(s,stderr);
- exit(1);
- }
- tolower(c)
- char c;
- {
- if(c >= 'A' && c <= 'Z')c = c + ('a' - 'A');
- return c;
- }
- #ifdef GOODHELP
- setdocs()
- {
- docs[0] = docs0;docs[1] = docs1;docs[2] = docs2;
- docs[3] = docs3;docs[4] = docs4;docs[5] = docs5;
- docs[6] = docs6;docs[7] = docs7;docs[8] = docs8;
- docs[9] = docs9;docs[10]=docs10;docs[11]=docs11;
- docs[12]=docs12;docs[13]=docs13;docs[14]=docs14;
- }
- setpatdoc()
- {
- patdoc[0] = patdoc0;patdoc[1] = patdoc1;patdoc[2] = patdoc2;
- patdoc[3] = patdoc3;patdoc[4] = patdoc4;patdoc[5] = patdoc5;
- patdoc[6] = patdoc6;patdoc[7] = patdoc7;patdoc[8] = patdoc8;
- patdoc[9] = patdoc9;patdoc[10]=patdoc10;patdoc[11]=patdoc11;
- patdoc[12]=patdoc12;patdoc[13]=patdoc13;patdoc[14]=patdoc14;
- patdoc[15]=patdoc15;patdoc[16]=patdoc16;patdoc[17]=patdoc17;
- patdoc[18]=patdoc18;patdoc[19]=patdoc19;patdoc[20]=patdoc20;
- patdoc[21]=patdoc21;patdoc[22]=patdoc22;patdoc[23]=patdoc23;
- patdoc[24]=patdoc24;patdoc[25]=patdoc25;patdoc[26]=patdoc26;
- patdoc[27]=patdoc27;patdoc[28]=patdoc28;patdoc[29]=patdoc29;
- #ifdef NOLOWER
- patdoc[30]=patdoc30;patdoc[31]=patdoc31;
- #endif
- }
- #endif